套件載入

經營現況

## Rows: 817741 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): TRANSACTION_DT, CUSTOMER_ID, AGE_GROUP, PIN_CODE, PRODUCT_ID
## dbl (4): PRODUCT_SUBCLASS, AMOUNT, ASSET, SALES_PRICE
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: The `...` argument of `group_indices()` is deprecated as of dplyr 1.0.0.
## ℹ Please `group_by()` first
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Package

#rm(list=ls(all=T))
options(digits=4, scipen=12)
library(dplyr)
library(ggplot2)
library(caTools)
library(ROCR)
library(googleVis)
## 
## Welcome to googleVis version 0.7.3
## 
## Please read Google's Terms of Use
## before you start using the package:
## https://developers.google.com/terms/
## 
## Note, the plot method of googleVis will by default use
## the standard browser to display its output.
## 
## See the googleVis package vignettes for more details,
## or visit https://mages.github.io/googleVis/.
## 
## To suppress this message use:
## suppressPackageStartupMessages(library(googleVis))
library(chorddiag)
library(Cairo)
#library(mosaic)
X0<-X0 %>% filter(!age=="a99")
X = load("data/tf4.rdata")
B<-B %>% filter(!age=="a99")

RFM 分群

group_by(B, age) %>% summarise(
  recent=mean(r), 
  freq=mean(f), 
  money=mean(m), 
  size=n() ) %>% 
  mutate( revenue = size*money/1000 )  %>% 
  filter(size > 1) %>% 
  ggplot(aes(x=freq, y=money)) +
  geom_point(aes(size=revenue, col=recent),alpha=0.5) +
  scale_size(range=c(4,30)) +
  scale_color_gradient(low="green",high="red") +
  scale_x_log10() + scale_y_log10(limits=c(500,1500)) + 
  geom_text(aes(label = age ),size=3) +
  theme_bw() + guides(size=F) +
  labs(title="Customer Segements",
       subtitle="(bubble_size:revenue_contribution; text:group_size)",
       color="Recency") +
  xlab("Frequency") + ylab("Average Transaction Amount")
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
根據RFM分群,我們可以選出兩個重要客戶族群:
  1. 重要挽留客戶:a29、a34、a39
  2. 重要保持客戶:a49、a44、a54

模型預期結果

# 根據年齡分成兩個群組
B <- B %>%
  mutate(AgeGroup = case_when(
    age %in% c("a29","a34","a39","a59") ~ "重要挽留客戶",
    age %in% c("a44", "a49", "a54") ~ "重要保持客戶",
    TRUE ~ "Other"
  ))


# 根據年齡群組計算統計信息
summary_data <- B %>%
  group_by(AgeGroup) %>%
  summarise(n = n(), Buy = mean(Buy), Rev = mean(Rev))

# 繪製泡泡圖
p <- ggplot(summary_data, aes(Buy, Rev, size = n, label = AgeGroup)) + 
  geom_point(alpha = 0.5, color = 'gold') + 
  geom_text(size = 4) + 
  labs(title = "Age Group Comparison (size: no. customers)") +
  xlab("Avg. Buying Probability") + ylab("Avg. Expected Revenue") +
  scale_size(range = c(4, 20)) + theme_bw()

# 將 ggplot 圖轉換為 plotly 圖
ggplotly(p)
library(dplyr)
library(ggplot2)
library(plotly)

# 根據年齡分成兩個群組
B <- B %>%
  mutate(AgeGroup = case_when(
    age %in% c("a29","a34","a39","a59") ~ "重要挽留客戶",
    age %in% c("a44", "a49", "a54") ~ "重要保持客戶",
  )) %>%
  filter(!is.na(AgeGroup))  # 排除沒有指定群組的觀測值

# 根據年齡群組計算統計信息
summary_data <- B %>%
  group_by(AgeGroup) %>%
  summarise(n = n(), Buy = mean(Buy), Rev = mean(Rev))

# 繪製泡泡圖
p <- ggplot(summary_data, aes(Buy, Rev, size = n, label = AgeGroup)) + 
  geom_point(alpha = 0.5, color = 'gold') + 
  geom_text(size = 4) + 
  labs(title = "Age Group Comparison (size: no. customers)") +
  xlab("Avg. Buying Probability") + ylab("Avg. Expected Revenue") +
  scale_size(range = c(4, 20)) + theme_bw()

# 將 ggplot 圖轉換為 plotly 圖
ggplotly(p)
margin = 0.17  # assume margin = 0.17
DP = function(x,m0,b0,a0) {m0*plogis((10/a0)*(x-b0))}
mm=c(0.30, 0.20)
bb=c(  20,   20)
aa=c(  40,   40) 

X <- seq(0, 60, 2)

data <- do.call(rbind, lapply(1:length(mm), function(i) data.frame(
  color = c('重要挽留客戶', '重要保持客戶')[i], Cost = X,
  Gain = DP(X, mm[i], bb[i], aa[i])
))) %>%
  data.frame

ggplot(data, aes(x = Cost, y = Gain, col = color)) +
  geom_line(size = 1.5, alpha = 0.5) +
  theme_bw() +
  ggtitle("Prob. Function: f(x|m,b,a)")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

mm=c(0.20, 0.25)
bb=c(  15,   25)
aa=c(  40,   40) 

X <- seq(0, 60, 2)

data <- do.call(rbind, lapply(1:length(mm), function(i) data.frame(
  color = c('重要挽留客戶', '重要保持客戶')[i], Cost = X,
  Gain = DP(X, mm[i], bb[i], aa[i])
))) %>%
  data.frame

ggplot(data, aes(x = Cost, y = Gain, col = color)) +
  geom_line(size = 1.5, alpha = 0.5) +
  theme_bw() +
  ggtitle("Prob. Function: f(x|m,b,a)")

mm=c(0.20, 0.25)
bb=c(  25,   30)
aa=c(  40,   40) 

# 選擇符合條件的資料
selected_data <- B[B$age %in% c("a29", "a34", "a39"), ]

X = seq(10, 60, 1) 
df = do.call(rbind, lapply(1:length(mm), function(i) {
  sapply(X, function(x) {
    dp = pmin(1-selected_data$Buy, DP(x,mm[i],bb[i],aa[i]))
    eR = dp*selected_data$Rev*margin - x
    c(i=i, x=x, eR.ALL=sum(eR), N=sum(eR>0), eR.SEL=sum(eR[eR > 0]) )
    }) %>% t %>% data.frame
  })) 


df %>%
  mutate_at(vars(eR.ALL, eR.SEL), function(y) round(y/1000)) %>% 
  gather('key','value',-i,-x) %>% 
  mutate(Instrument = paste0('I',i)) %>%
  ggplot(aes(x=x, y=value, col=Instrument)) + 
  geom_hline(yintercept=0, linetype='dashed', col='blue') +
  geom_line(size=1.5,alpha=0.5) + 
  xlab('工具選項(成本)') + ylab('預期收益($K)') + 
  ggtitle('行銷工具優化','假設行銷工具的效果是其成本的函數') +
    facet_wrap(~key,ncol=1,scales='free_y') + theme_bw() -> p

plotly::ggplotly(p)
mm=c(0.20, 0.25)
bb=c(  25,   30)
aa=c(  40,   40) 

# 選擇符合條件的資料
selected_data <- B[B$age %in% c("a49","a44","a54"), ]

X = seq(10, 60, 1) 
df = do.call(rbind, lapply(1:length(mm), function(i) {
  sapply(X, function(x) {
    dp = pmin(1-selected_data$Buy, DP(x,mm[i],bb[i],aa[i]))
    eR = dp*selected_data$Rev*margin - x
    c(i=i, x=x, eR.ALL=sum(eR), N=sum(eR>0), eR.SEL=sum(eR[eR > 0]) )
    }) %>% t %>% data.frame
  })) 


df %>%
  mutate_at(vars(eR.ALL, eR.SEL), function(y) round(y/1000)) %>% 
  gather('key','value',-i,-x) %>% 
  mutate(Instrument = paste0('I',i)) %>%
  ggplot(aes(x=x, y=value, col=Instrument)) + 
  geom_hline(yintercept=0, linetype='dashed', col='blue') +
  geom_line(size=1.5,alpha=0.5) + 
  xlab('工具選項(成本)') + ylab('預期收益($K)') + 
  ggtitle('行銷工具優化','假設行銷工具的效果是其成本的函數') +
    facet_wrap(~key,ncol=1,scales='free_y') + theme_bw() -> p

plotly::ggplotly(p)
library(dplyr)
mm=c(0.20, 0.25, 0.15)
bb=c(  25,   30,   15)
aa=c(  40,   40,   30) 
cidx = lapply(  # B's index for the 4 age groups
  list(c("a29","a34","a39"),
       c("a49","a44","a54"),c("a24","a64","a69")),
  function(v) B$age %in% v)  

X = seq(10, 60, 1) 
df = do.call(rbind, lapply(1:length(mm), function(i) {
  sapply(X, function(x) {
    dp = pmin(1- B$Buy[ cidx[[i]] ]  , DP(x,mm[i],bb[i],aa[i]))
    eR = dp * B$Rev[ cidx[[i]] ] * margin - x
    c(i=i, x=x, eR.ALL=sum(eR), N=sum(eR>0), eR.SEL=sum(eR[eR > 0]) )
    }) %>% t %>% data.frame
  })) 

result = group_by(df, i) %>% top_n(1,eR.SEL)
print(result)
## # A tibble: 53 × 5
## # Groups:   i [3]
##        i     x  eR.ALL     N eR.SEL
##    <dbl> <dbl>   <dbl> <dbl>  <dbl>
##  1     1    35 -81945.  3857 55266.
##  2     2    40 -55026.  3047 55254.
##  3     3    10      0      0     0 
##  4     3    11      0      0     0 
##  5     3    12      0      0     0 
##  6     3    13      0      0     0 
##  7     3    14      0      0     0 
##  8     3    15      0      0     0 
##  9     3    16      0      0     0 
## 10     3    17      0      0     0 
## # ℹ 43 more rows
# x   eR.ALL  N    eR.SEL
 #35  -93627  4002 56924 重要
 #40  -55026  3047 55254 挽留
#Tafeng每個月的營收大約是2,500萬,獲利大約是390萬,一般零售商的行銷經費大約是獲利的5%~15%
#g = 0.156   
#totalR = sum(Z0$price) # 102,585,246 / 4 = 25,646,311.5
#totalC = sum(Z0$cost) # 86,781,697 totalR - totalC # 15,803,549 / 4 = 3,950,887.25
g = 0.3   # 平均毛利率  
N = 36    # 估計CLV期間(三年)
d = 0.01  # 資本利率
B$CLV = g * B$Rev * rowSums(sapply(
  0:N, function(i) (B$Buy/(1+d))^i ) )

summary(B$CLV)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      35     296     471    1365     810 1145741
ggplot(B, aes(CLV)) + 
  geom_histogram(bins=30, fill="green",alpha=0.6) + 
  scale_x_log10() + facet_wrap(~age)

B <- B %>%
  mutate(age_group = case_when(
    age %in% c("a29", "a34", "a39", "a59") ~ "重要挽留客戶",
    age %in% c("a49", "a44", "a54") ~ "重要保持客戶",
    TRUE ~ "Other"  
  ))

ggplot(B, aes(CLV)) + 
  geom_histogram(bins = 30, fill = "green", alpha = 0.6) + 
  scale_x_log10() + 
  facet_wrap(~age_group)

# MOSA = function(formula, data) mosaic(formula, data, shade=T, 
#   margins=c(0,1,0,0), labeling_args = list(rot_labels=c(0,0,0,0)),
#   gp_labels=gpar(fontsize=9), legend_args=list(fontsize=9),
#   gp_text=gpar(fontsize=7),labeling=labeling_residuals)
# 
# X0$age <- as.character(X0$age)
# custom_groups <- list(
#   c("a29", "a34", "a39", "a59"),
#   c("a44", "a49", "a54"),
#   c("a24", "a64", "a69","a99")
# )
# 
# X0$age_group <- character(length(X0$age))
# 
# for (i in seq_along(custom_groups)) {
#   group <- custom_groups[[i]]
#   X0$age_group[X0$age %in% group] <- paste(group, collapse = ",")
# }
# 
# 
# X0$wday = format(X0$date, "%a")
# MOSA(~age_group + wday, X0)
# 
# #X0$wday = format(X0$date, "%a")
# #MOSA(~age+wday, X0)

行銷方案分析

# load(data/tf0.rdata)
# group_by(Z0, age) %>% summarise(sum(price)/sum(cost) - 1)
margin = 0.5  # assume margin = 0.17
DP = function(x,m0,b0,a0) {m0*plogis((10/a0)*(x-b0))}

# # 設定目標客群
# cidx = lapply(  # B's index for the 4 age groups
#   list(c("a29","a34","a39"),
#        c("a49","a44","a54")),
#   function(v) B$age %in% v) 

# 設定目標客群
cidx = lapply(  # B's index for the 4 age groups
  list(c("a44","a49","a54")),
  #list(c("a29","a34","a39")),
  function(v) B$age %in% v) 

Fig-2: Zip Codes Fig-2: Zip Codes

Fig-2: Zip Codes
Fig-2: Zip Codes
Fig-2: Zip Codes
Fig-2: Zip Codes

mba模擬X

#cidx <- list(c("a29", "a34", "a39"))
# 會員集點
#c("a29","a34","a39") m:0.2 b:15 a:40 
# X:27 eR.ALL:836435 N:12991 eR.SEL:844141 增加回購:0.190

#c("a44","a49","a54") m:0.25 b:25 a:40
# X:38 eR.ALL:659212 N:8900  eR.SEL:673686 增加回購:0.240

# 團購
#c("a29","a34","a39") m:0.3 b:20 a:40 
# X:34 eR.ALL:1318205 N:13084  eR.SEL:1327606 增加回購:0.291

#c("a44","a49","a54") m:0.2 b:20 a:40
# X:32 eR.ALL:518662 N:8889  eR.SEL:530438 增加回購:0.190


mm <- c(0.25, 0.2)
bb <- c(25, 20)
aa <- c(40, 40)
X <- seq(2, 100, 1)

df <- do.call(rbind, lapply(1:length(mm), function(i) {
  sapply(X, function(x) {
    dp <- pmin(1 - B$Buy[cidx[[1]]], DP(x, mm[i], bb[i], aa[i]))
    dp.R <- dp * B$Rev[cidx[[1]]]  # 預期總營收增額
    eR <- dp * B$Rev[cidx[[1]]] * margin - x
    c(i = i, x = x, eR.ALL = sum(eR), N = sum(eR > 0), eR.SEL = sum(eR[eR > 0]))
  }) %>% t %>% data.frame
}))

result = group_by(df, i) %>% top_n(1,eR.SEL)

### 模擬最佳參數

# 設定目標客群
cidx <- lapply(
  list(c("a29", "a34", "a39")),
   #list(c("a49","a44","a54")),
  function(v) B$age %in% v
)

mm <- seq(0.04, 0.25, 0.01)
#mm <- 0.25
#bb <- seq(10, 50, 5)
bb <- 20
aa <- 40
#aa <- seq(10, 50, 5)
X <- seq(2, 100, 1)

param_combinations <- expand.grid(mm = mm, bb = bb, aa = aa, X = X)

df <- do.call(rbind, lapply(1:nrow(param_combinations), function(i) {
  mm_val <- param_combinations[i, 'mm']
  bb_val <- param_combinations[i, 'bb']
  aa_val <- param_combinations[i, 'aa']
  x_val <- param_combinations[i, 'X']

  dp <- pmin(1 - B$Buy[cidx[[1]]], DP(x_val, mm_val, bb_val, aa_val))
  dp.R <- dp * B$Rev[cidx[[1]]]  # 預期總營收增額
  eR <- dp * B$Rev[cidx[[1]]] * margin - x_val

  c(mm = mm_val, bb = bb_val, aa = aa_val, X = x_val, eR.ALL = sum(eR), N = sum(eR > 0), eR.SEL = sum(eR[eR > 0]))
}))

# 將結果存儲到 df 中
df <- as.data.frame(df)

“eR.SEL” 中的前五大值的索引

# # 找到 "eR.SEL" 中的最大值的索引
# max_index <- which.max(df$eR.SEL)
# 
# # 提取相應的行
# max_row <- df[max_index, ]
# 
# # 顯示結果
# print(max_row

# 找到
top_five_indices <- order(df$eR.SEL, decreasing = TRUE)[1:5]

# 提取相應的行
top_five_rows <- df[top_five_indices, ]

# 顯示結果
print(top_five_rows)
##       mm bb aa  X  eR.ALL     N  eR.SEL
## 704 0.25 20 40 33 1047850 12955 1058010
## 682 0.25 20 40 32 1047080 12991 1056572
## 726 0.25 20 40 34 1045603 12913 1056484
## 748 0.25 20 40 35 1040934 12878 1052584
## 660 0.25 20 40 31 1042574 13018 1051445